*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file generates outputs that summarize various attributes of prime locations
* This do file is run for various p-values (defined by gloabl $SL)
* The preferred outputs will be chosen later

* Read data for CBSA with prime locations 
	u "$temp/grid_PL_output_p${SL}/PL_grids_p${SL}__all.dta", clear

* Gen Area
	gen grid_area = (0.25)^2
	label var grid_area "Area in sq. km"	
	capture drop PL_area					
	egen PL_area = sum(grid_area), by(cbsafp PLID)	
	egen PL_lat = mean(lat), by(cbsafp PLID)
	egen PL_lon = mean(lon), by(cbsafp PLID)

* Industries by PL (notice difference to "inside" which are for all PLs in a city; the below is by PL)
	foreach ind in MFG NT PS TS O  {
	egen PL_`ind'_emp = sum(cell_`ind'_emp) if PL == 1, by(cbsafp PLID)
	gen PL_`ind'empshare = PL_`ind'_emp/PL_emp*100 
	}
* Condense to PL Data set
	keep cbsafp metro_name PL* metro_emp PL_*empshare PLrankByCity PPL *empshare_outsidePL PLID_US
	drop PL_x PL_y
	duplicates drop

* Produce ranks	
	egen PLrankUS = rank(PL_emp), field
	
* Generate output tables
	gsort PLrankUS
	
* Generate CBD Variable
	gen CBD = PLrankByCity == 1

* Only identify PLs if they are not pseudo PLs
	replace PL = 0 if PPL == 1
	
* Report how many PLs we have
	display "Number of real prime locations"
	tab PL
	display "How many CBDs are prime locations"
	tab PL if CBD == 1

* Keep key variables 	
	keep cbsafp metro_name PLrankUS PLrankByCity PL_lat  PL_lon PL_area PL_emp PL_empdens PL_*empshare metro_emp PL CBD *empshare_outsidePL PLID_US
	order PLID_US cbsafp metro_name PLrankUS PLrankByCity PL_lat  PL_lon PL_area PL_emp PL_empdens PL_*empshare metro_emp  PL CBD *empshare_outsidePL

* Label variables	
	label var metro_name "CBSA"
	label var PLrankByCity "Rank within CBSA"
	label var PLrankUS "Rank within US"
	label var PL_area "Area in km{superscript:2}"
	replace PL_emp  = PL_emp / 1000 
	label var PL_emp "Total employment (in thousands)"
	replace metro_emp = metro_emp / 1000
	label var metro_emp "CBSA employment (in thousands)"
	replace PL_empdens =  PL_empdens / 1000
	label var PL_empdens "Density (1000 jobs / km{superscript:2})"	
	foreach ind in MFG NT PS TS O  {
		label var PL_`ind'empshare "`ind' share (%)"
	}
	label var CBD "Central Business District (largest employment cluster in CBSA)"
	label var PL "Prime location (enough employment)"
	
* Save prime locations data set for toolkit
	capture mkdir "$temp/US-CBSAs"
	capture mkdir "$temp/US-CBSAs/p${SL}"
	save "$temp/US-CBSAs//p${SL}/PL-data.dta", replace	
	export delimited using "$temp/US-CBSAs//p${SL}/PL-data.csv", replace	
	
* Illustrate distributions
	sum PL_area if metro_emp < 1000
	local mean_small = r(mean)
	sum PL_area if metro_emp >= 1000
	local mean_large = r(mean)
	twoway (hist PL_area if PL==1 & metro_emp < 1000 & PL_area < 50, color(blue%50) start(0) width(0.4)) ///
		   (hist PL_area if PL==1 & metro_emp >= 1000 & PL_area < 50, color(red%50) start(0) width(0.4)) /// 
			, graphregion(color(white)) name(area, replace) legend(order(1 "CBSA employment <= 1M" 2 "CBSA employment > 1M") pos(6) cols(2) region(lstyle(none))) xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_emp if metro_emp < 1000
	local mean_small = r(mean)
	sum PL_emp if metro_emp >= 1000
	local mean_large = r(mean)
	twoway (hist PL_emp if PL==1 & metro_emp < 1000 & PL_emp < 250, color(blue%50) start(0) width(5)) ///
		   (hist PL_emp if PL==1 & metro_emp >= 1000 & PL_emp < 250, color(red%50) start(0) width(5)) ///
		   , graphregion(color(white)) name(emp, replace) xlabel(0[50]250) xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_empdens if metro_emp < 1000
	local mean_small = r(mean)
	sum PL_empdens if metro_emp >= 1000
	local mean_large = r(mean)
	twoway (hist PL_empdens if PL==1 & metro_emp < 1000 & PL_empdens < 100, color(blue%50) start(0) width(1.6)) ///
		   (hist PL_empdens if PL==1 & metro_emp >= 1000 & PL_empdens < 100, color(red%50) start(0) width(1.6)) ///
			, graphregion(color(white)) name(empdens, replace) xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	sum PL_TSempshare if metro_emp < 1000
	local mean_small = r(mean)
	sum PL_TSempshare if metro_emp >= 1000
	local mean_large = r(mean)
	twoway (hist PL_TSempshare if PL==1 & metro_emp < 1000 , color(blue%50) start(0) width(1.6)) ///
		   (hist PL_TSempshare if PL==1 & metro_emp >= 1000 , color(red%50) start(0) width(1.6)) ///
			, graphregion(color(white)) name(TS, replace) xline(`mean_small',lcolor(blue) lpattern(shortdash))  xline(`mean_large',lcolor(red) lpattern(longdash))
	grc1leg area emp empdens TS, graphregion(color(white)) graphregion(color(white)) xsize(10) ysize(5) cols(2) 
* Write Appendix Figure B.2.1 for given p-value (figure for preferred p-value will be chosen later) 
	capture mkdir "$temp/figures_App"
	capture mkdir "$temp/figures_App/US-CBSAs"
	capture mkdir "$temp/figures_App/US-CBSAs/p${SL}"
	graph export  "$temp/figures_App/US-CBSAs/p${SL}/FIG_B2_1_US-PL-histograms.pdf", replace
	
* Analyse specialization
	egen PL_count = max(PLrankByCity), by(cbsafp)
	label var PL_count "\# prime locations (PL)"
	label var PLrankByCity "PL rank"
	* Gen HHI
	gen HHI = 0
	foreach var of varlist PL_MFGempshare PL_NTempshare PL_PSempshare PL_TSempshare PL_Oempshare {
		replace HHI = HHI + (`var'/100)^2
	}
	gen HHI_outside = 0
	foreach var of varlist MFGempshare_outsidePL NTempshare_outsidePL PSempshare_outsidePL TSempshare_outsidePL Oempshare_outsidePL STempshare_outsidePL {
		replace HHI_outside = HHI_outside + (`var'/100)^2
	}	
	label var HHI "Herfindahl–Hirschman Index (HHI) [0,1]"
	label var HHI_outside "HHI, outside PL"

	eststo: reg HHI PL_count HHI_outside , cluster(cbsafp)
		qui estadd local MSAFE "-"
		qui estadd local MSAs "All"	
	eststo: reg HHI PLrankByCity , abs(cbsafp) cluster(cbsafp)
		qui estadd local MSAFE "Yes"
		qui estadd local MSAs "All"	
	eststo: reg HHI PL_count HHI_outside if  metro_emp < 1000, cluster(cbsafp) 
		qui estadd local MSAFE "-"
		qui estadd local MSAs "Emp. < 1M"	
	eststo: reg HHI PLrankByCity if  metro_emp < 1000, abs(cbsafp) cluster(cbsafp)
		qui estadd local MSAFE "Yes"
		qui estadd local MSAs "Emp. < 1M"		
	eststo: reg HHI PL_count HHI_outside if  metro_emp >= 1000, cluster(cbsafp) 
		qui estadd local MSAFE "-"
		qui estadd local MSAs "Emp. $\geq$ 1M"		
	eststo: reg HHI PLrankByCity if  metro_emp >= 1000, abs(cbsafp) cluster(cbsafp) 
		qui estadd local MSAFE "Yes"
		qui estadd local MSAs "Emp. $\geq$ 1M"		
* Export Table B.2.4 for given p-value (figure for preferred p-value will be chosen later) 
	capture mkdir "$temp/tables_App"
	capture mkdir "$temp/tables_App/US-CBSAs"
	capture mkdir "$temp/tables_App/US-CBSAs/p${SL}"
	esttab  using "$temp/tables_App/US-CBSAs/p${SL}/TAB_B2_4_Specialization.tex", replace b(3) se(4) label compress   r2(3) stats(MSAFE MSAs N r2 , fmt(%18.3g )  ///
	labels("`MSA effects'" "`MSAs'"  `"Observations"' `"\(R^{2}\)"' ))  ///  
	title("Prime location specialization") modelwidth(6) nogap nostar  addnote( "Unit of observation is prime location. Herfindahl–Hirschman Index is an index of sector specialization by prime location calculated using the following sectors: Manufacturing and wholesale, tradable services, non-tradable services, public services, other sectors. Standard errors (in parenthesis) clustered on MSAs." )	 // star(+ 0.15 * 0.1 ** 0.05 *** 0.01)
		eststo clear

* Increase data set and add mean	
	local obs = _N+3
	display `obs'
	set obs `obs'
	
* Compute mean of all and top 10		
	foreach var of varlist PL_area PL_emp PL_empdens PL_*empshare *empshare_outsidePL {
		sum `var' if _n <= 10
		replace `var'=r(mean) if _n==_N-2
		sum `var' if metro_emp >= 1000
		replace `var'=r(mean) if _n==_N-1
		sum `var' if metro_emp < 1000
		replace `var'=r(mean) if _n==_N
		}
		
* Drop all non-top-10
	drop if _n > 10 & _n <= _N-3

* Label 	
	replace metro_name = "Mean, top-10 CBSAs" if _n==_N-2
	replace metro_name = "Mean, CBSA emp. >= 1M " if _n==_N-1
	replace metro_name = "Mean, CBSA emp. < 1M " if _n==_N
	tostring PL_area, replace force format(%9.2f)
	tostring PL_emp, replace force format(%12.0fc)
	tostring PL_empdens, replace force format(%12.0fc)
	foreach ind in MFG NT PS TS O  {
		tostring PL_`ind'empshare, replace force format(%9.1fc)
		tostring `ind'empshare_outsidePL, replace force format(%9.1fc)
	}
	
* Describe PL
	drop PL
	gen PL = ""
	replace PL = "Midtown" if metro_name == "New York-Newark-Jersey City, NY-NJ-PA" & PLrankByCity == 1 
	replace PL = "Lower Manhatten" if metro_name == "New York-Newark-Jersey City, NY-NJ-PA" & PLrankByCity == 2 
	replace PL = "The Loop" if metro_name == "Chicago-Naperville-Elgin, IL-IN-WI" & PLrankByCity == 1 
	replace PL = "Downtown D.C." if metro_name == "Washington-Arlington-Alexandria, DC-VA-MD-WV" & PLrankByCity == 1 
	replace PL = "Downtown Boston" if metro_name == "Boston-Cambridge-Newton, MA-NH" & PLrankByCity == 1 
	replace PL = "Financial District" if metro_name == "San Francisco-Oakland-Hayward, CA" & PLrankByCity == 1 
	replace PL = "City Center" if metro_name == "Philadelphia-Camden-Wilmington, PA-NJ-DE-MD" & PLrankByCity == 1 
	replace PL = "Seattle" if metro_name == "Seattle-Tacoma-Bellevue, WA" & PLrankByCity == 1 
	replace PL = "Downtown Los Angeles" if metro_name == "Los Angeles-Long Beach-Anaheim, CA" & PLrankByCity == 1 
	replace PL = "Downtown Atlanta" if metro_name == "Atlanta-Sandy Springs-Roswell, GA" & PLrankByCity == 1 

* Pic first city in CBSA name as city name	
	split metro_name, parse("-")   
	replace metro_name = metro_name1
	
* Finalize	
	label var PL "Prime location"
	keep metro_name PL PLrankUS PLrankByCity	PL_area PL_emp PL_empdens PL_*empshare *empshare_outsidePL
	order metro_name PL PLrankUS PLrankByCity	PL_area PL_emp PL_empdens PL_TSempshare TSempshare_outsidePL PL_MFGempshare MFGempshare_outsidePL PL_NTempshare NTempshare_outsidePL PL_PSempshare PSempshare_outsidePL PL_Oempshare Oempshare_outsidePL 
* Write Table 1 for given p-value, except last columns (table for preferred p-value will be chosen later, last two columns will be added later) 
	capture mkdir "$temp/tables"
	capture mkdir "$temp/tables/US-CBSAs"
	capture mkdir "$temp/tables/US-CBSAs/p${SL}"
	texsave metro_name PL PLrankUS PLrankByCity	PL_area PL_emp PL_empdens PL_TSempshare TSempshare_outsidePL using "$temp/tables/US-CBSAs/p${SL}/TAB_1_PL_SUMMARY_A.tex", ///
	title("Prime locations in US cities") size("footnotesize")   width(16cm) align(llcccccc)  varlabels replace  frag  footnote(".") 	
	
* Script ends
	
	
